1
2
3
4
5
6
7
8
9
10
11 """Restriction Enzyme classes.
12
13 Notes about the diverses class of the restriction enzyme implementation::
14
15 RestrictionType is the type of all restriction enzymes.
16 -----------------------------------------------------------------------
17 AbstractCut implements some methods that are common to all enzymes.
18 -----------------------------------------------------------------------
19 NoCut, OneCut,TwoCuts represent the number of double strand cuts
20 produced by the enzyme.
21 they correspond to the 4th field of the
22 rebase record emboss_e.NNN.
23 0->NoCut : the enzyme is not characterised.
24 2->OneCut : the enzyme produce one double strand cut.
25 4->TwoCuts : two double strand cuts.
26 -----------------------------------------------------------------------
27 Meth_Dep, Meth_Undep represent the methylation susceptibility to
28 the enzyme.
29 Not implemented yet.
30 -----------------------------------------------------------------------
31 Palindromic, if the site is palindromic or not.
32 NotPalindromic allow some optimisations of the code.
33 No need to check the reverse strand
34 with palindromic sites.
35 -----------------------------------------------------------------------
36 Unknown, Blunt, represent the overhang.
37 Ov5, Ov3 Unknown is here for symmetry reasons and
38 correspond to enzymes that are not
39 characterised in rebase.
40 -----------------------------------------------------------------------
41 Defined, Ambiguous, represent the sequence of the overhang.
42 NotDefined
43 NotDefined is for enzymes not characterised
44 in rebase.
45
46 Defined correspond to enzymes that display
47 a constant overhang whatever the sequence.
48 ex : EcoRI. G^AATTC -> overhang :AATT
49 CTTAA^G
50
51 Ambiguous : the overhang varies with the
52 sequence restricted.
53 Typically enzymes which cut outside their
54 restriction site or (but not always)
55 inside an ambiguous site.
56 ex:
57 AcuI CTGAAG(22/20) -> overhang : NN
58 AasI GACNNN^NNNGTC -> overhang : NN
59 CTGN^NNNNNCAG
60
61 note : these 3 classes refers to the overhang not the site.
62 So the enzyme ApoI (RAATTY) is defined even if its
63 restriction site is ambiguous.
64
65 ApoI R^AATTY -> overhang : AATT -> Defined
66 YTTAA^R
67 Accordingly, blunt enzymes are always Defined even
68 when they cut outside their restriction site.
69 -----------------------------------------------------------------------
70 Not_available, as found in rebase file emboss_r.NNN files.
71 Commercially_available
72 allow the selection of the enzymes
73 according to their suppliers to reduce the
74 quantity of results.
75 Also will allow the implementation of
76 buffer compatibility tables. Not
77 implemented yet.
78
79 the list of suppliers is extracted from
80 emboss_s.NNN
81 -----------------------------------------------------------------------
82
83 """
84
85 from __future__ import print_function
86
87 import warnings
88
89 from Bio._py3k import zip
90 from Bio._py3k import filter
91 from Bio._py3k import range
92
93 import re
94 import itertools
95
96 from Bio.Seq import Seq, MutableSeq
97 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
98 from Bio.Restriction.Restriction_Dictionary import typedict
99 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
100 from Bio.Restriction.PrintFormat import PrintFormat
101 from Bio import BiopythonWarning
109 """Check characters in a string (PRIVATE).
110
111 Remove digits and white space present in string. Allows any valid ambiguous
112 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
113
114 Other characters (e.g. symbols) trigger a TypeError.
115
116 Returns the string WITH A LEADING SPACE (!). This is for backwards
117 compatibility, and may in part be explained by the fact that
118 ``Bio.Restriction`` doesn't use zero based counting.
119 """
120
121 seq_string = "".join(seq_string.split()).upper()
122
123 for c in "0123456789":
124 seq_string = seq_string.replace(c, "")
125
126 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
127 raise TypeError("Invalid character found in %s" % repr(seq_string))
128 return " " + seq_string
129
130
131 matching = {"A": "ARWMHVDN", "C": "CYSMHBVN", "G": "GRSKBVDN",
132 "T": "TYWKHBDN", "R": "ABDGHKMNSRWV", "Y": "CBDHKMNSTWVY",
133 "W": "ABDHKMNRTWVY", "S": "CBDGHKMNSRVY", "M": "ACBDHMNSRWVY",
134 "K": "BDGHKNSRTWVY", "H": "ACBDHKMNSRTWVY",
135 "B": "CBDGHKMNSRTWVY", "V": "ACBDGHKMNSRWVY",
136 "D": "ABDGHKMNSRTWVY", "N": "ACBDGHKMNSRTWVY"}
137
138 DNA = Seq
256
259 """RestrictionType. Type from which all enzyme classes are derived.
260
261 Implement the operator methods.
262 """
263
264 - def __init__(cls, name="", bases=(), dct=None):
265 """Initialize RestrictionType instance.
266
267 Not intended to be used in normal operation. The enzymes are
268 instantiated when importing the module.
269 See below.
270 """
271 if "-" in name:
272 raise ValueError("Problem with hyphen in %s as enzyme name"
273 % repr(name))
274
275
276
277 try:
278 cls.compsite = re.compile(cls.compsite)
279 except AttributeError:
280
281
282
283 pass
284 except Exception:
285 raise ValueError("Problem with regular expression, re.compiled(%s)"
286 % repr(cls.compsite))
287
300
302 """Override '/' operator to use as search method.
303
304 >>> from Bio.Restriction import EcoRI
305 >>> EcoRI/Seq('GAATTC')
306 [2]
307
308 Returns RE.search(other).
309 """
310 return cls.search(other)
311
313 """Override division with reversed operands to use as search method.
314
315 >>> from Bio.Restriction import EcoRI
316 >>> Seq('GAATTC')/EcoRI
317 [2]
318
319 Returns RE.search(other).
320 """
321 return cls.search(other)
322
324 """Override Python 3 division operator to use as search method.
325
326 Like __div__.
327 """
328 return cls.search(other)
329
331 """As __truediv___, with reversed operands.
332
333 Like __rdiv__.
334 """
335 return cls.search(other)
336
338 """Override '//' operator to use as catalyse method.
339
340 >>> from Bio.Restriction import EcoRI
341 >>> EcoRI//Seq('GAATTC')
342 (Seq('G'), Seq('AATTC'))
343
344 Returns RE.catalyse(other).
345 """
346 return cls.catalyse(other)
347
349 """As __floordiv__, with reversed operands.
350
351 >>> from Bio.Restriction import EcoRI
352 >>> Seq('GAATTC')//EcoRI
353 (Seq('G'), Seq('AATTC'))
354
355 Returns RE.catalyse(other).
356 """
357 return cls.catalyse(other)
358
360 """Return the name of the enzyme as string."""
361 return cls.__name__
362
364 """Implement repr method.
365
366 Used with eval or exec will instantiate the enzyme.
367 """
368 return "%s" % cls.__name__
369
371 """Return length of recognition site of enzyme as int."""
372 try:
373 return cls.size
374 except AttributeError:
375
376
377
378
379 return 0
380
382 """Implement ``hash()`` method for ``RestrictionType``.
383
384 Python default is to use ``id(...)``
385 This is consistent with the ``__eq__`` implementation
386 """
387 return id(cls)
388
390 """Override '==' operator.
391
392 True if RE and other are the same enzyme.
393
394 Specifically this checks they are the same Python object.
395 """
396
397 return id(cls) == id(other)
398
400 """Override '!=' operator.
401
402 Isoschizomer strict (same recognition site, same restriction) -> False
403 All the other-> True
404
405 WARNING - This is not the inverse of the __eq__ method
406
407 >>> from Bio.Restriction import SacI, SstI
408 >>> SacI != SstI # true isoschizomers
409 False
410 >>> SacI == SstI
411 False
412 """
413 if not isinstance(other, RestrictionType):
414 return True
415 elif cls.charac == other.charac:
416 return False
417 else:
418 return True
419
421 """Override '>>' operator to test for neoschizomers.
422
423 neoschizomer : same recognition site, different restriction. -> True
424 all the others : -> False
425
426 >>> from Bio.Restriction import SmaI, XmaI
427 >>> SmaI >> XmaI
428 True
429 """
430 if not isinstance(other, RestrictionType):
431 return False
432 elif cls.site == other.site and cls.charac != other.charac:
433 return True
434 else:
435 return False
436
438 """Override '%' operator to test for compatible overhangs.
439
440 True if a and b have compatible overhang.
441
442 >>> from Bio.Restriction import XhoI, SalI
443 >>> XhoI % SalI
444 True
445 """
446 if not isinstance(other, RestrictionType):
447 raise TypeError("expected RestrictionType, got %s instead"
448 % type(other))
449 return cls._mod1(other)
450
452 """Compare length of recognition site of two enzymes.
453
454 Override '>='. a is greater or equal than b if the a site is longer
455 than b site. If their site have the same length sort by alphabetical
456 order of their names.
457
458 >>> from Bio.Restriction import EcoRI, EcoRV
459 >>> EcoRI.size
460 6
461 >>> EcoRV.size
462 6
463 >>> EcoRI >= EcoRV
464 False
465 """
466 if not isinstance(other, RestrictionType):
467 raise NotImplementedError
468 if len(cls) > len(other):
469 return True
470 elif cls.size == len(other) and cls.__name__ >= other.__name__:
471 return True
472 else:
473 return False
474
476 """Compare length of recognition site of two enzymes.
477
478 Override '>'. Sorting order:
479
480 1. size of the recognition site.
481 2. if equal size, alphabetical order of the names.
482
483 """
484 if not isinstance(other, RestrictionType):
485 raise NotImplementedError
486 if len(cls) > len(other):
487 return True
488 elif cls.size == len(other) and cls.__name__ > other.__name__:
489 return True
490 else:
491 return False
492
494 """Compare length of recognition site of two enzymes.
495
496 Override '<='. Sorting order:
497
498 1. size of the recognition site.
499 2. if equal size, alphabetical order of the names.
500
501 """
502 if not isinstance(other, RestrictionType):
503 raise NotImplementedError
504 elif len(cls) < len(other):
505 return True
506 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
507 return True
508 else:
509 return False
510
512 """Compare length of recognition site of two enzymes.
513
514 Override '<'. Sorting order:
515
516 1. size of the recognition site.
517 2. if equal size, alphabetical order of the names.
518
519 """
520 if not isinstance(other, RestrictionType):
521 raise NotImplementedError
522 elif len(cls) < len(other):
523 return True
524 elif len(cls) == len(other) and cls.__name__ < other.__name__:
525 return True
526 else:
527 return False
528
531 """Implement the methods that are common to all restriction enzymes.
532
533 All the methods are classmethod.
534
535 For internal use only. Not meant to be instantiated.
536 """
537
538 @classmethod
539 - def search(cls, dna, linear=True):
540 """Return a list of cutting sites of the enzyme in the sequence.
541
542 Compensate for circular sequences and so on.
543
544 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
545
546 If linear is False, the restriction sites that span over the boundaries
547 will be included.
548
549 The positions are the first base of the 3' fragment,
550 i.e. the first base after the position the enzyme will cut.
551 """
552
553
554
555
556
557
558
559 if isinstance(dna, FormattedSeq):
560 cls.dna = dna
561 return cls._search()
562 else:
563 cls.dna = FormattedSeq(dna, linear)
564 return cls._search()
565
566 @classmethod
568 """Print all the suppliers of restriction enzyme."""
569 supply = sorted(x[0] for x in suppliers_dict.values())
570 print(",\n".join(supply))
571 return
572
573 @classmethod
575 """Test for real isoschizomer.
576
577 True if other is an isoschizomer of RE, but not an neoschizomer,
578 else False.
579
580 Equischizomer: same site, same position of restriction.
581
582 >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
583 >>> SacI.is_equischizomer(SstI)
584 True
585 >>> SmaI.is_equischizomer(XmaI)
586 False
587
588 """
589 return not cls != other
590
591 @classmethod
593 """Test for neoschizomer.
594
595 True if other is an isoschizomer of RE, else False.
596 Neoschizomer: same site, different position of restriction.
597 """
598 return cls >> other
599
600 @classmethod
602 """Test for same recognition site.
603
604 True if other has the same recognition site, else False.
605
606 Isoschizomer: same site.
607
608 >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
609 >>> SacI.is_isoschizomer(SstI)
610 True
611 >>> SmaI.is_isoschizomer(XmaI)
612 True
613
614 """
615 return (not cls != other) or cls >> other
616
617 @classmethod
619 """List equischizomers of the enzyme.
620
621 Return a tuple of all the isoschizomers of RE.
622 If batch is supplied it is used instead of the default AllEnzymes.
623
624 Equischizomer: same site, same position of restriction.
625 """
626 if not batch:
627 batch = AllEnzymes
628 r = [x for x in batch if not cls != x]
629 i = r.index(cls)
630 del r[i]
631 r.sort()
632 return r
633
634 @classmethod
636 """List neoschizomers of the enzyme.
637
638 Return a tuple of all the neoschizomers of RE.
639 If batch is supplied it is used instead of the default AllEnzymes.
640
641 Neoschizomer: same site, different position of restriction.
642 """
643 if not batch:
644 batch = AllEnzymes
645 r = sorted(x for x in batch if cls >> x)
646 return r
647
648 @classmethod
650 """List all isoschizomers of the enzyme.
651
652 Return a tuple of all the equischizomers and neoschizomers of RE.
653 If batch is supplied it is used instead of the default AllEnzymes.
654 """
655 if not batch:
656 batch = AllEnzymes
657 r = [x for x in batch if (cls >> x) or (not cls != x)]
658 i = r.index(cls)
659 del r[i]
660 r.sort()
661 return r
662
663 @classmethod
665 """Return the theoretically cutting frequency of the enzyme.
666
667 Frequency of the site, given as 'one cut per x bases' (int).
668 """
669 return cls.freq
670
671
672 -class NoCut(AbstractCut):
673 """Implement the methods specific to the enzymes that do not cut.
674
675 These enzymes are generally enzymes that have been only partially
676 characterised and the way they cut the DNA is unknow or enzymes for
677 which the pattern of cut is to complex to be recorded in Rebase
678 (ncuts values of 0 in emboss_e.###).
679
680 When using search() with these enzymes the values returned are at the start
681 of the restriction site.
682
683 Their catalyse() method returns a TypeError.
684
685 Unknown and NotDefined are also part of the base classes of these enzymes.
686
687 Internal use only. Not meant to be instantiated.
688 """
689
690 @classmethod
692 """Return if the cutting pattern has one cut.
693
694 True if the enzyme cut the sequence one time on each strand.
695 """
696 return False
697
698 @classmethod
700 """Return if the cutting pattern has two cuts.
701
702 True if the enzyme cut the sequence twice on each strand.
703 """
704 return False
705
706 @classmethod
708 """Return a generator that moves the cutting position by 1 (PRIVATE).
709
710 For internal use only.
711
712 location is an integer corresponding to the location of the match for
713 the enzyme pattern in the sequence.
714 _modify returns the real place where the enzyme will cut.
715
716 Example::
717
718 EcoRI pattern : GAATTC
719 EcoRI will cut after the G.
720 so in the sequence:
721 ______
722 GAATACACGGAATTCGA
723 |
724 10
725 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
726 EcoRI cut after the G so:
727 EcoRI._modify(10) -> 11.
728
729 If the enzyme cut twice _modify will returns two integer corresponding
730 to each cutting site.
731 """
732 yield location
733
734 @classmethod
736 """Return a generator that moves the cutting position by 1 (PRIVATE).
737
738 For internal use only.
739
740 As _modify for site situated on the antiparallel strand when the
741 enzyme is not palindromic.
742 """
743 yield location
744
745 @classmethod
747 """Return a list of the enzyme's characteristics as tuple.
748
749 the tuple contains the attributes:
750
751 - fst5 -> first 5' cut ((current strand) or None
752 - fst3 -> first 3' cut (complementary strand) or None
753 - scd5 -> second 5' cut (current strand) or None
754 - scd5 -> second 3' cut (complementary strand) or None
755 - site -> recognition site.
756
757 """
758 return None, None, None, None, cls.site
759
760
761 -class OneCut(AbstractCut):
762 """Implement the methods for enzymes that cut the DNA only once.
763
764 Correspond to ncuts values of 2 in emboss_e.###
765
766 Internal use only. Not meant to be instantiated.
767 """
768
769 @classmethod
771 """Return if the cutting pattern has one cut.
772
773 True if the enzyme cut the sequence one time on each strand.
774 """
775 return True
776
777 @classmethod
779 """Return if the cutting pattern has two cuts.
780
781 True if the enzyme cut the sequence twice on each strand.
782 """
783 return False
784
785 @classmethod
787 """Return a generator that moves the cutting position by 1 (PRIVATE).
788
789 For internal use only.
790
791 location is an integer corresponding to the location of the match for
792 the enzyme pattern in the sequence.
793 _modify returns the real place where the enzyme will cut.
794
795 Example::
796
797 EcoRI pattern : GAATTC
798 EcoRI will cut after the G.
799 so in the sequence:
800 ______
801 GAATACACGGAATTCGA
802 |
803 10
804 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
805 EcoRI cut after the G so:
806 EcoRI._modify(10) -> 11.
807
808 if the enzyme cut twice _modify will returns two integer corresponding
809 to each cutting site.
810 """
811 yield location + cls.fst5
812
813 @classmethod
815 """Return a generator that moves the cutting position by 1 (PRIVATE).
816
817 For internal use only.
818
819 As _modify for site situated on the antiparallel strand when the
820 enzyme is not palindromic
821 """
822 yield location - cls.fst3
823
824 @classmethod
826 """Return a list of the enzyme's characteristics as tuple.
827
828 The tuple contains the attributes:
829
830 - fst5 -> first 5' cut ((current strand) or None
831 - fst3 -> first 3' cut (complementary strand) or None
832 - scd5 -> second 5' cut (current strand) or None
833 - scd5 -> second 3' cut (complementary strand) or None
834 - site -> recognition site.
835
836 """
837 return cls.fst5, cls.fst3, None, None, cls.site
838
841 """Implement the methods for enzymes that cut the DNA twice.
842
843 Correspond to ncuts values of 4 in emboss_e.###
844
845 Internal use only. Not meant to be instantiated.
846 """
847
848 @classmethod
850 """Return if the cutting pattern has one cut.
851
852 True if the enzyme cut the sequence one time on each strand.
853 """
854 return False
855
856 @classmethod
858 """Return if the cutting pattern has two cuts.
859
860 True if the enzyme cut the sequence twice on each strand.
861 """
862 return True
863
864 @classmethod
866 """Return a generator that moves the cutting position by 1 (PRIVATE).
867
868 For internal use only.
869
870 location is an integer corresponding to the location of the match for
871 the enzyme pattern in the sequence.
872 _modify returns the real place where the enzyme will cut.
873
874 example::
875
876 EcoRI pattern : GAATTC
877 EcoRI will cut after the G.
878 so in the sequence:
879 ______
880 GAATACACGGAATTCGA
881 |
882 10
883 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
884 EcoRI cut after the G so:
885 EcoRI._modify(10) -> 11.
886
887 if the enzyme cut twice _modify will returns two integer corresponding
888 to each cutting site.
889 """
890 yield location + cls.fst5
891 yield location + cls.scd5
892
893 @classmethod
895 """Return a generator that moves the cutting position by 1 (PRIVATE).
896
897 for internal use only.
898
899 as _modify for site situated on the antiparallel strand when the
900 enzyme is not palindromic
901 """
902 yield location - cls.fst3
903 yield location - cls.scd3
904
905 @classmethod
907 """Return a list of the enzyme's characteristics as tuple.
908
909 the tuple contains the attributes:
910
911 - fst5 -> first 5' cut ((current strand) or None
912 - fst3 -> first 3' cut (complementary strand) or None
913 - scd5 -> second 5' cut (current strand) or None
914 - scd5 -> second 3' cut (complementary strand) or None
915 - site -> recognition site.
916
917 """
918 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
919
922 """Implement the information about methylation.
923
924 Enzymes of this class possess a site which is methylable.
925 """
926
927 @classmethod
929 """Return if recognition site can be methylated.
930
931 True if the recognition site is a methylable.
932 """
933 return True
934
937 """Implement information about methylation sensitibility.
938
939 Enzymes of this class are not sensible to methylation.
940 """
941
942 @classmethod
944 """Return if recognition site can be methylated.
945
946 True if the recognition site is a methylable.
947 """
948 return False
949
952 """Implement methods for enzymes with palindromic recognition sites.
953
954 palindromic means : the recognition site and its reverse complement are
955 identical.
956 Remarks : an enzyme with a site CGNNCG is palindromic even if some
957 of the sites that it will recognise are not.
958 for example here : CGAACG
959
960 Internal use only. Not meant to be instantiated.
961 """
962
963 @classmethod
965 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
966
967 For internal use only.
968
969 Implement the search method for palindromic enzymes.
970 """
971 siteloc = cls.dna.finditer(cls.compsite, cls.size)
972 cls.results = [r for s, g in siteloc for r in cls._modify(s)]
973 if cls.results:
974 cls._drop()
975 return cls.results
976
977 @classmethod
979 """Return if the enzyme has a palindromic recoginition site."""
980 return True
981
984 """Implement methods for enzymes with non-palindromic recognition sites.
985
986 Palindromic means : the recognition site and its reverse complement are
987 identical.
988
989 Internal use only. Not meant to be instantiated.
990 """
991
992 @classmethod
994 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE).
995
996 For internal use only.
997
998 Implement the search method for non palindromic enzymes.
999 """
1000 iterator = cls.dna.finditer(cls.compsite, cls.size)
1001 cls.results = []
1002 modif = cls._modify
1003 revmodif = cls._rev_modify
1004 s = str(cls)
1005 cls.on_minus = []
1006
1007 for start, group in iterator:
1008 if group(s):
1009 cls.results += [r for r in modif(start)]
1010 else:
1011 cls.on_minus += [r for r in revmodif(start)]
1012 cls.results += cls.on_minus
1013
1014 if cls.results:
1015 cls.results.sort()
1016 cls._drop()
1017 return cls.results
1018
1019 @classmethod
1021 """Return if the enzyme has a palindromic recoginition site."""
1022 return False
1023
1026 """Implement methods for enzymes that produce unknown overhangs.
1027
1028 These enzymes are also NotDefined and NoCut.
1029
1030 Internal use only. Not meant to be instantiated.
1031 """
1032
1033 @classmethod
1035 """List the sequence fragments after cutting dna with enzyme.
1036
1037 Return a tuple of dna as will be produced by using RE to restrict the
1038 dna.
1039
1040 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1041
1042 If linear is False, the sequence is considered to be circular and the
1043 output will be modified accordingly.
1044 """
1045 raise NotImplementedError("%s restriction is unknown."
1046 % cls.__name__)
1047 catalyze = catalyse
1048
1049 @classmethod
1051 """Return if the enzyme produces blunt ends.
1052
1053 True if the enzyme produces blunt end.
1054
1055 Related methods:
1056
1057 - RE.is_3overhang()
1058 - RE.is_5overhang()
1059 - RE.is_unknown()
1060
1061 """
1062 return False
1063
1064 @classmethod
1066 """Return if the enzymes produces 5' overhanging ends.
1067
1068 True if the enzyme produces 5' overhang sticky end.
1069
1070 Related methods:
1071
1072 - RE.is_3overhang()
1073 - RE.is_blunt()
1074 - RE.is_unknown()
1075
1076 """
1077 return False
1078
1079 @classmethod
1081 """Return if the enzyme produces 3' overhanging ends.
1082
1083 True if the enzyme produces 3' overhang sticky end.
1084
1085 Related methods:
1086
1087 - RE.is_5overhang()
1088 - RE.is_blunt()
1089 - RE.is_unknown()
1090
1091 """
1092 return False
1093
1094 @classmethod
1096 """Return the type of the enzyme's overhang as string.
1097
1098 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1099 """
1100 return "unknown"
1101
1102 @classmethod
1104 """List all enzymes that produce compatible ends for the enzyme."""
1105 return []
1106
1107 @classmethod
1109 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1110
1111 For internal use only.
1112
1113 Test for the compatibility of restriction ending of RE and other.
1114 """
1115 return False
1116
1117
1118 -class Blunt(AbstractCut):
1119 """Implement methods for enzymes that produce blunt ends.
1120
1121 The enzyme cuts the + strand and the - strand of the DNA at the same
1122 place.
1123
1124 Internal use only. Not meant to be instantiated.
1125 """
1126
1127 @classmethod
1129 """List the sequence fragments after cutting dna with enzyme.
1130
1131 Return a tuple of dna as will be produced by using RE to restrict the
1132 dna.
1133
1134 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1135
1136 If linear is False, the sequence is considered to be circular and the
1137 output will be modified accordingly.
1138 """
1139 r = cls.search(dna, linear)
1140 d = cls.dna
1141 if not r:
1142 return d[1:],
1143 fragments = []
1144 length = len(r) - 1
1145 if d.is_linear():
1146
1147
1148
1149 fragments.append(d[1:r[0]])
1150 if length:
1151
1152
1153
1154 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1155
1156
1157
1158 fragments.append(d[r[-1]:])
1159 else:
1160
1161
1162
1163 fragments.append(d[r[-1]:] + d[1:r[0]])
1164 if not length:
1165
1166
1167
1168 return tuple(fragments)
1169
1170
1171
1172 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1173 return tuple(fragments)
1174 catalyze = catalyse
1175
1176 @classmethod
1178 """Return if the enzyme produces blunt ends.
1179
1180 True if the enzyme produces blunt end.
1181
1182 Related methods:
1183
1184 - RE.is_3overhang()
1185 - RE.is_5overhang()
1186 - RE.is_unknown()
1187
1188 """
1189 return True
1190
1191 @classmethod
1193 """Return if the enzymes produces 5' overhanging ends.
1194
1195 True if the enzyme produces 5' overhang sticky end.
1196
1197 Related methods:
1198
1199 - RE.is_3overhang()
1200 - RE.is_blunt()
1201 - RE.is_unknown()
1202
1203 """
1204 return False
1205
1206 @classmethod
1208 """Return if the enzyme produces 3' overhanging ends.
1209
1210 True if the enzyme produces 3' overhang sticky end.
1211
1212 Related methods:
1213
1214 - RE.is_5overhang()
1215 - RE.is_blunt()
1216 - RE.is_unknown()
1217
1218 """
1219 return False
1220
1221 @classmethod
1223 """Return the type of the enzyme's overhang as string.
1224
1225 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1226 """
1227 return "blunt"
1228
1229 @classmethod
1231 """List all enzymes that produce compatible ends for the enzyme."""
1232 if not batch:
1233 batch = AllEnzymes
1234 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
1235 return r
1236
1237 @staticmethod
1239 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1240
1241 For internal use only
1242
1243 Test for the compatibility of restriction ending of RE and other.
1244 """
1245 return issubclass(other, Blunt)
1246
1247
1248 -class Ov5(AbstractCut):
1249 """Implement methods for enzymes that produce 5' overhanging ends.
1250
1251 The enzyme cuts the + strand after the - strand of the DNA.
1252
1253 Internal use only. Not meant to be instantiated.
1254 """
1255
1256 @classmethod
1258 """List the sequence fragments after cutting dna with enzyme.
1259
1260 Return a tuple of dna as will be produced by using RE to restrict the
1261 dna.
1262
1263 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1264
1265 If linear is False, the sequence is considered to be circular and the
1266 output will be modified accordingly.
1267 """
1268 r = cls.search(dna, linear)
1269 d = cls.dna
1270 if not r:
1271 return d[1:],
1272 length = len(r) - 1
1273 fragments = []
1274 if d.is_linear():
1275
1276
1277
1278 fragments.append(d[1:r[0]])
1279 if length:
1280
1281
1282
1283 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1284
1285
1286
1287 fragments.append(d[r[-1]:])
1288 else:
1289
1290
1291
1292 fragments.append(d[r[-1]:] + d[1:r[0]])
1293 if not length:
1294
1295
1296
1297 return tuple(fragments)
1298
1299
1300
1301 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1302 return tuple(fragments)
1303 catalyze = catalyse
1304
1305 @classmethod
1307 """Return if the enzyme produces blunt ends.
1308
1309 True if the enzyme produces blunt end.
1310
1311 Related methods:
1312
1313 - RE.is_3overhang()
1314 - RE.is_5overhang()
1315 - RE.is_unknown()
1316
1317 """
1318 return False
1319
1320 @classmethod
1322 """Return if the enzymes produces 5' overhanging ends.
1323
1324 True if the enzyme produces 5' overhang sticky end.
1325
1326 Related methods:
1327
1328 - RE.is_3overhang()
1329 - RE.is_blunt()
1330 - RE.is_unknown()
1331
1332 """
1333 return True
1334
1335 @classmethod
1337 """Return if the enzyme produces 3' overhanging ends.
1338
1339 True if the enzyme produces 3' overhang sticky end.
1340
1341 Related methods:
1342
1343 - RE.is_5overhang()
1344 - RE.is_blunt()
1345 - RE.is_unknown()
1346
1347 """
1348 return False
1349
1350 @classmethod
1352 """Return the type of the enzyme's overhang as string.
1353
1354 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1355 """
1356 return "5' overhang"
1357
1358 @classmethod
1360 """List all enzymes that produce compatible ends for the enzyme."""
1361 if not batch:
1362 batch = AllEnzymes
1363 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and
1364 x % cls)
1365 return r
1366
1367 @classmethod
1369 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1370
1371 For internal use only.
1372
1373 Test for the compatibility of restriction ending of RE and other.
1374 """
1375 if issubclass(other, Ov5):
1376 return cls._mod2(other)
1377 else:
1378 return False
1379
1380
1381 -class Ov3(AbstractCut):
1382 """Implement methods for enzymes that produce 3' overhanging ends.
1383
1384 The enzyme cuts the - strand after the + strand of the DNA.
1385
1386 Internal use only. Not meant to be instantiated.
1387 """
1388
1389 @classmethod
1391 """List the sequence fragments after cutting dna with enzyme.
1392
1393 Return a tuple of dna as will be produced by using RE to restrict the
1394 dna.
1395
1396 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1397
1398 If linear is False, the sequence is considered to be circular and the
1399 output will be modified accordingly.
1400 """
1401 r = cls.search(dna, linear)
1402 d = cls.dna
1403 if not r:
1404 return d[1:],
1405 fragments = []
1406 length = len(r) - 1
1407 if d.is_linear():
1408
1409
1410
1411 fragments.append(d[1:r[0]])
1412 if length:
1413
1414
1415
1416 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1417
1418
1419
1420 fragments.append(d[r[-1]:])
1421 else:
1422
1423
1424
1425 fragments.append(d[r[-1]:] + d[1:r[0]])
1426 if not length:
1427
1428
1429
1430 return tuple(fragments)
1431
1432
1433
1434 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1435 return tuple(fragments)
1436 catalyze = catalyse
1437
1438 @classmethod
1440 """Return if the enzyme produces blunt ends.
1441
1442 True if the enzyme produces blunt end.
1443
1444 Related methods:
1445
1446 - RE.is_3overhang()
1447 - RE.is_5overhang()
1448 - RE.is_unknown()
1449
1450 """
1451 return False
1452
1453 @classmethod
1455 """Return if the enzymes produces 5' overhanging ends.
1456
1457 True if the enzyme produces 5' overhang sticky end.
1458
1459 Related methods:
1460
1461 - RE.is_3overhang()
1462 - RE.is_blunt()
1463 - RE.is_unknown()
1464
1465 """
1466 return False
1467
1468 @classmethod
1470 """Return if the enzyme produces 3' overhanging ends.
1471
1472 True if the enzyme produces 3' overhang sticky end.
1473
1474 Related methods:
1475
1476 - RE.is_5overhang()
1477 - RE.is_blunt()
1478 - RE.is_unknown()
1479
1480 """
1481 return True
1482
1483 @classmethod
1485 """Return the type of the enzyme's overhang as string.
1486
1487 Can be "3' overhang", "5' overhang", "blunt", "unknown".
1488 """
1489 return "3' overhang"
1490
1491 @classmethod
1493 """List all enzymes that produce compatible ends for the enzyme."""
1494 if not batch:
1495 batch = AllEnzymes
1496 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and
1497 x % cls)
1498 return r
1499
1500 @classmethod
1502 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1503
1504 For internal use only.
1505
1506 Test for the compatibility of restriction ending of RE and other.
1507 """
1508
1509
1510
1511 if issubclass(other, Ov3):
1512 return cls._mod2(other)
1513 else:
1514 return False
1515
1518 """Implement methods for enzymes with defined recognition site and cut.
1519
1520 Typical example : EcoRI -> G^AATT_C
1521 The overhang will always be AATT
1522 Notes:
1523 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1524 Their overhang is always the same : blunt!
1525
1526 Internal use only. Not meant to be instantiated.
1527 """
1528
1529 @classmethod
1531 """Remove cuts that are outsite of the sequence (PRIVATE).
1532
1533 For internal use only.
1534
1535 Drop the site that are situated outside the sequence in linear
1536 sequence. Modify the index for site in circular sequences.
1537 """
1538
1539
1540
1541
1542
1543
1544
1545
1546 length = len(cls.dna)
1547 drop = itertools.dropwhile
1548 take = itertools.takewhile
1549 if cls.dna.is_linear():
1550 cls.results = [x for x in drop(lambda x:x <= 1, cls.results)]
1551 cls.results = [x for x in take(lambda x:x <= length, cls.results)]
1552 else:
1553 for index, location in enumerate(cls.results):
1554 if location < 1:
1555 cls.results[index] += length
1556 else:
1557 break
1558 for index, location in enumerate(cls.results[::-1]):
1559 if location > length:
1560 cls.results[-(index + 1)] -= length
1561 else:
1562 break
1563 return
1564
1565 @classmethod
1567 """Return if recognition sequence and cut are defined.
1568
1569 True if the sequence recognised and cut is constant,
1570 i.e. the recognition site is not degenerated AND the enzyme cut inside
1571 the site.
1572
1573 Related methods:
1574
1575 - RE.is_ambiguous()
1576 - RE.is_unknown()
1577
1578 """
1579 return True
1580
1581 @classmethod
1583 """Return if recognition sequence and cut may be ambiguous.
1584
1585 True if the sequence recognised and cut is ambiguous,
1586 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1587 the site.
1588
1589 Related methods:
1590
1591 - RE.is_defined()
1592 - RE.is_unknown()
1593
1594 """
1595 return False
1596
1597 @classmethod
1599 """Return if recognition sequence is unknown.
1600
1601 True if the sequence is unknown,
1602 i.e. the recognition site has not been characterised yet.
1603
1604 Related methods:
1605
1606 - RE.is_defined()
1607 - RE.is_ambiguous()
1608
1609 """
1610 return False
1611
1612 @classmethod
1614 """Return a string representing the recognition site and cuttings.
1615
1616 Return a representation of the site with the cut on the (+) strand
1617 represented as '^' and the cut on the (-) strand as '_'.
1618 ie:
1619
1620 >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
1621 >>> EcoRI.elucidate() # 5' overhang
1622 'G^AATT_C'
1623 >>> KpnI.elucidate() # 3' overhang
1624 'G_GTAC^C'
1625 >>> EcoRV.elucidate() # blunt
1626 'GAT^_ATC'
1627 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1628 '? GTATAC ?'
1629 >>>
1630
1631 """
1632 f5 = cls.fst5
1633 f3 = cls.fst3
1634 site = cls.site
1635 if cls.cut_twice():
1636 re = "cut twice, not yet implemented sorry."
1637 elif cls.is_5overhang():
1638 if f5 == f3 == 0:
1639 re = "N^" + cls.site + "_N"
1640 elif f3 == 0:
1641 re = site[:f5] + "^" + site[f5:] + "_N"
1642 else:
1643 re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:]
1644 elif cls.is_blunt():
1645 re = site[:f5] + "^_" + site[f5:]
1646 else:
1647 if f5 == f3 == 0:
1648 re = "N_" + site + "^N"
1649 else:
1650 re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:]
1651 return re
1652
1653 @classmethod
1655 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1656
1657 For internal use only.
1658
1659 Test for the compatibility of restriction ending of RE and other.
1660 """
1661
1662
1663
1664 if other.ovhgseq == cls.ovhgseq:
1665 return True
1666 elif issubclass(other, Ambiguous):
1667 return other._mod2(cls)
1668 else:
1669 return False
1670
1673 """Implement methods for enzymes that produce variable overhangs.
1674
1675 Typical example : BstXI -> CCAN_NNNN^NTGG
1676 The overhang can be any sequence of 4 bases.
1677
1678 Notes:
1679 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
1680 Their overhang is always the same : blunt!
1681
1682 Internal use only. Not meant to be instantiated.
1683
1684 """
1685
1686 @classmethod
1713
1714 @classmethod
1716 """Return if recognition sequence and cut are defined.
1717
1718 True if the sequence recognised and cut is constant,
1719 i.e. the recognition site is not degenerated AND the enzyme cut inside
1720 the site.
1721
1722 Related methods:
1723
1724 - RE.is_ambiguous()
1725 - RE.is_unknown()
1726
1727 """
1728 return False
1729
1730 @classmethod
1732 """Return if recognition sequence and cut may be ambiguous.
1733
1734 True if the sequence recognised and cut is ambiguous,
1735 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1736 the site.
1737
1738 Related methods:
1739
1740 - RE.is_defined()
1741 - RE.is_unknown()
1742
1743 """
1744 return True
1745
1746 @classmethod
1748 """Return if recognition sequence is unknown.
1749
1750 True if the sequence is unknown,
1751 i.e. the recognition site has not been characterised yet.
1752
1753 Related methods:
1754
1755 - RE.is_defined()
1756 - RE.is_ambiguous()
1757
1758 """
1759 return False
1760
1761 @classmethod
1763 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1764
1765 For internal use only.
1766
1767 Test for the compatibility of restriction ending of RE and other.
1768 """
1769
1770
1771
1772 if len(cls.ovhgseq) != len(other.ovhgseq):
1773 return False
1774 else:
1775 se = cls.ovhgseq
1776 for base in se:
1777 if base in "ATCG":
1778 pass
1779 if base in "N":
1780 se = ".".join(se.split("N"))
1781 if base in "RYWMSKHDBV":
1782 expand = "[" + matching[base] + "]"
1783 se = expand.join(se.split(base))
1784 if re.match(se, other.ovhgseq):
1785 return True
1786 else:
1787 return False
1788
1789 @classmethod
1791 """Return a string representing the recognition site and cuttings.
1792
1793 Return a representation of the site with the cut on the (+) strand
1794 represented as '^' and the cut on the (-) strand as '_'.
1795 ie:
1796
1797 >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
1798 >>> EcoRI.elucidate() # 5' overhang
1799 'G^AATT_C'
1800 >>> KpnI.elucidate() # 3' overhang
1801 'G_GTAC^C'
1802 >>> EcoRV.elucidate() # blunt
1803 'GAT^_ATC'
1804 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1805 '? GTATAC ?'
1806 >>>
1807
1808 """
1809 f5 = cls.fst5
1810 f3 = cls.fst3
1811 length = len(cls)
1812 site = cls.site
1813 if cls.cut_twice():
1814 re = "cut twice, not yet implemented sorry."
1815 elif cls.is_5overhang():
1816 if f3 == f5 == 0:
1817 re = "N^" + site + "_N"
1818 elif 0 <= f5 <= length and 0 <= f3 + length <= length:
1819 re = site[:f5] + "^" + site[f5:f3] + "_" + site[f3:]
1820 elif 0 <= f5 <= length:
1821 re = site[:f5] + "^" + site[f5:] + f3 * "N" + "_N"
1822 elif 0 <= f3 + length <= length:
1823 re = "N^" + abs(f5) * "N" + site[:f3] + "_" + site[f3:]
1824 elif f3 + length < 0:
1825 re = "N^" * abs(f5) * "N" + "_" + abs(length + f3) * "N" + site
1826 elif f5 > length:
1827 re = site + (f5 - length) * "N" + "^" + (length +
1828 f3 - f5) * "N" + "_N"
1829 else:
1830 re = "N^" + abs(f5) * "N" + site + f3 * "N" + "_N"
1831 elif cls.is_blunt():
1832 if f5 < 0:
1833 re = "N^_" + abs(f5) * "N" + site
1834 elif f5 > length:
1835 re = site + (f5 - length) * "N" + "^_N"
1836 else:
1837 raise ValueError("%s.easyrepr() : error f5=%i"
1838 % (cls.name, f5))
1839 else:
1840 if f3 == 0:
1841 if f5 == 0:
1842 re = "N_" + site + "^N"
1843 else:
1844 re = site + "_" + (f5 - length) * "N" + "^N"
1845 elif 0 < f3 + length <= length and 0 <= f5 <= length:
1846 re = site[:f3] + "_" + site[f3:f5] + "^" + site[f5:]
1847 elif 0 < f3 + length <= length:
1848 re = site[:f3] + "_" + site[f3:] + (f5 - length) * "N" + "^N"
1849 elif 0 <= f5 <= length:
1850 re = "N_" + "N" * (f3 + length) + site[:f5] + "^" + site[f5:]
1851 elif f3 > 0:
1852 re = site + f3 * "N" + "_" + (f5 - f3 - length) * "N" + "^N"
1853 elif f5 < 0:
1854 re = "N_" + abs(f3 - f5 + length) * "N" + "^" + abs(f5) * "N" \
1855 + site
1856 else:
1857 re = "N_" + abs(f3 + length) * "N" + site + (f5 - length) * \
1858 "N" + "^N"
1859 return re
1860
1863 """Implement methods for enzymes with non-characterized overhangs.
1864
1865 Correspond to NoCut and Unknown.
1866
1867 Internal use only. Not meant to be instantiated.
1868 """
1869
1870 @classmethod
1872 """Remove cuts that are outsite of the sequence (PRIVATE).
1873
1874 For internal use only.
1875
1876 Drop the site that are situated outside the sequence in linear
1877 sequence. Modify the index for site in circular sequences.
1878 """
1879 if cls.dna.is_linear():
1880 return
1881 else:
1882 length = len(cls.dna)
1883 for index, location in enumerate(cls.results):
1884 if location < 1:
1885 cls.results[index] += length
1886 else:
1887 break
1888 for index, location in enumerate(cls.results[:-1]):
1889 if location > length:
1890 cls.results[-(index + 1)] -= length
1891 else:
1892 break
1893 return
1894
1895 @classmethod
1897 """Return if recognition sequence and cut are defined.
1898
1899 True if the sequence recognised and cut is constant,
1900 i.e. the recognition site is not degenerated AND the enzyme cut inside
1901 the site.
1902
1903 Related methods:
1904
1905 - RE.is_ambiguous()
1906 - RE.is_unknown()
1907
1908 """
1909 return False
1910
1911 @classmethod
1913 """Return if recognition sequence and cut may be ambiguous.
1914
1915 True if the sequence recognised and cut is ambiguous,
1916 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1917 the site.
1918
1919 Related methods:
1920
1921 - RE.is_defined()
1922 - RE.is_unknown()
1923
1924 """
1925 return False
1926
1927 @classmethod
1929 """Return if recognition sequence is unknown.
1930
1931 True if the sequence is unknown,
1932 i.e. the recognition site has not been characterised yet.
1933
1934 Related methods:
1935
1936 - RE.is_defined()
1937 - RE.is_ambiguous()
1938
1939 """
1940 return True
1941
1942 @classmethod
1944 """Test if other enzyme produces compatible ends for enzyme (PRIVATE).
1945
1946 For internal use only.
1947
1948 Test for the compatibility of restriction ending of RE and other.
1949 """
1950
1951
1952
1953
1954
1955
1956
1957 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
1958 % (str(cls), str(other), str(cls)))
1959
1960 @classmethod
1962 """Return a string representing the recognition site and cuttings.
1963
1964 Return a representation of the site with the cut on the (+) strand
1965 represented as '^' and the cut on the (-) strand as '_'.
1966 ie:
1967
1968 >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
1969 >>> EcoRI.elucidate() # 5' overhang
1970 'G^AATT_C'
1971 >>> KpnI.elucidate() # 3' overhang
1972 'G_GTAC^C'
1973 >>> EcoRV.elucidate() # blunt
1974 'GAT^_ATC'
1975 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1976 '? GTATAC ?'
1977 >>>
1978
1979 """
1980 return "? %s ?" % cls.site
1981
1984 """Implement methods for enzymes which are commercially available.
1985
1986 Internal use only. Not meant to be instantiated.
1987 """
1988
1989
1990
1991
1992
1993
1994 @classmethod
1996 """Print a list of suppliers of the enzyme."""
1997 for s in cls.suppl:
1998 print(suppliers_dict[s][0] + ",")
1999 return
2000
2001 @classmethod
2003 """Return a list of suppliers of the enzyme."""
2004 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
2005
2006 @classmethod
2008 """Return the recommended buffer of the supplier for this enzyme.
2009
2010 Not implemented yet.
2011 """
2012 return
2013
2014 @classmethod
2016 """Return if enzyme is commercially available.
2017
2018 True if RE has suppliers.
2019 """
2020 return True
2021
2024 """Implement methods for enzymes which are not commercially available.
2025
2026 Internal use only. Not meant to be instantiated.
2027 """
2028
2029 @staticmethod
2031 """Print a list of suppliers of the enzyme."""
2032 return None
2033
2034 @classmethod
2036 """Return a list of suppliers of the enzyme."""
2037 return []
2038
2039 @classmethod
2041 """Return the recommended buffer of the supplier for this enzyme.
2042
2043 Not implemented yet.
2044 """
2045 raise TypeError("Enzyme not commercially available.")
2046
2047 @classmethod
2049 """Return if enzyme is commercially available.
2050
2051 True if RE has suppliers.
2052 """
2053 return False
2054
2064 """Class for operations on more than one enzyme."""
2065
2066 - def __init__(self, first=(), suppliers=()):
2074
2076 """Return a readable representation of the ``RestrictionBatch``."""
2077 if len(self) < 5:
2078 return "+".join(self.elements())
2079 else:
2080 return "...".join(("+".join(self.elements()[:2]),
2081 "+".join(self.elements()[-2:])))
2082
2084 """Represent ``RestrictionBatch`` class as a string for debugging."""
2085 return "RestrictionBatch(%s)" % self.elements()
2086
2094
2096 """Override '/' operator to use as search method."""
2097 return self.search(other)
2098
2100 """Override division with reversed operands to use as search method."""
2101 return self.search(other)
2102
2104 """Override Python 3 division operator to use as search method.
2105
2106 Like __div__.
2107 """
2108 return self.search(other)
2109
2111 """As __truediv___, with reversed operands.
2112
2113 Like __rdiv__.
2114 """
2115 return self.search(other)
2116
2117 - def get(self, enzyme, add=False):
2118 """Check if enzyme is in batch and return it.
2119
2120 If add is True and enzyme is not in batch add enzyme to batch.
2121 If add is False (which is the default) only return enzyme.
2122 If enzyme is not a RestrictionType or can not be evaluated to
2123 a RestrictionType, raise a ValueError.
2124 """
2125 e = self.format(enzyme)
2126 if e in self:
2127 return e
2128 elif add:
2129 self.add(e)
2130 return e
2131 else:
2132 raise ValueError("enzyme %s is not in RestrictionBatch"
2133 % e.__name__)
2134
2136 """Filter enzymes in batch with supplied function.
2137
2138 The new batch will contain only the enzymes for which
2139 func return True.
2140 """
2141 d = [x for x in filter(func, self)]
2142 new = RestrictionBatch()
2143 new._data = dict(zip(d, [True] * len(d)))
2144 return new
2145
2147 """Add all enzymes from a given supplier to batch.
2148
2149 letter represents the suppliers as defined in the dictionary
2150 RestrictionDictionary.suppliers
2151 Returns None.
2152 Raise a KeyError if letter is not a supplier code.
2153 """
2154 supplier = suppliers_dict[letter]
2155 self.suppliers.append(letter)
2156 for x in supplier[1]:
2157 self.add_nocheck(eval(x))
2158 return
2159
2161 """List the current suppliers for the restriction batch.
2162
2163 Return a sorted list of the suppliers which have been used to
2164 create the batch.
2165 """
2166 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
2167 return suppl_list
2168
2170 """Override '+=' for use with sets.
2171
2172 b += other -> add other to b, check the type of other.
2173 """
2174 self.add(other)
2175 return self
2176
2178 """Overide '+' for use with sets.
2179
2180 b + other -> new RestrictionBatch.
2181 """
2182 new = self.__class__(self)
2183 new.add(other)
2184 return new
2185
2187 """Remove enzyme from restriction batch.
2188
2189 Safe set.remove method. Verify that other is a RestrictionType or can
2190 be evaluated to a RestrictionType.
2191 Raise a ValueError if other can not be evaluated to a RestrictionType.
2192 Raise a KeyError if other is not in B.
2193 """
2194 return set.remove(self, self.format(other))
2195
2196 - def add(self, other):
2197 """Add a restriction enzyme to the restriction batch.
2198
2199 Safe set.add method. Verify that other is a RestrictionType or can be
2200 evaluated to a RestrictionType.
2201 Raise a ValueError if other can not be evaluated to a RestrictionType.
2202 """
2203 return set.add(self, self.format(other))
2204
2206 """Add restriction enzyme to batch without checking its type."""
2207 return set.add(self, other)
2208
2226
2228 """Return if enzyme (name) is a known enzyme.
2229
2230 True if y or eval(y) is a RestrictionType.
2231 """
2232 return (isinstance(y, RestrictionType) or
2233 isinstance(eval(str(y)), RestrictionType))
2234
2235 - def split(self, *classes, **bool):
2236 """Extract enzymes of a certain class and put in new RestrictionBatch.
2237
2238 It works but it is slow, so it has really an interest when splitting
2239 over multiple conditions.
2240 """
2241 def splittest(element):
2242 for klass in classes:
2243 b = bool.get(klass.__name__, True)
2244 if issubclass(element, klass):
2245 if b:
2246 continue
2247 else:
2248 return False
2249 elif b:
2250 return False
2251 else:
2252 continue
2253 return True
2254 d = [k for k in filter(splittest, self)]
2255 new = RestrictionBatch()
2256 new._data = dict(zip(d, [True] * len(d)))
2257 return new
2258
2260 """List the enzymes of the RestrictionBatch as list of strings.
2261
2262 Give all the names of the enzymes in B sorted alphabetically.
2263 """
2264 return sorted(str(e) for e in self)
2265
2267 """List the names of the enzymes of the RestrictionBatch.
2268
2269 Return a list of the name of the elements of the batch.
2270 """
2271 return [str(e) for e in self]
2272
2273 @classmethod
2275 """Return a dicionary with supplier codes.
2276
2277 Letter code for the suppliers.
2278 """
2279 supply = {k: v[0] for k, v in suppliers_dict.items()}
2280 return supply
2281
2282 @classmethod
2284 """Print a list of supplier codes."""
2285 supply = [" = ".join(i) for i in cls.suppl_codes().items()]
2286 print("\n".join(supply))
2287 return
2288
2289 - def search(self, dna, linear=True):
2290 """Return a dic of cutting sites in the seq for the batch enzymes."""
2291
2292
2293
2294
2295 if not hasattr(self, "already_mapped"):
2296
2297
2298 self.already_mapped = None
2299 if isinstance(dna, DNA):
2300
2301
2302
2303
2304 if (str(dna), linear) == self.already_mapped:
2305 return self.mapping
2306 else:
2307 self.already_mapped = str(dna), linear
2308 fseq = FormattedSeq(dna, linear)
2309 self.mapping = {x: x.search(fseq) for x in self}
2310 return self.mapping
2311 elif isinstance(dna, FormattedSeq):
2312 if (str(dna), dna.linear) == self.already_mapped:
2313 return self.mapping
2314 else:
2315 self.already_mapped = str(dna), dna.linear
2316 self.mapping = {x: x.search(dna) for x in self}
2317 return self.mapping
2318 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"
2319 % type(dna))
2320
2321
2322
2323
2324
2325
2326
2327
2328 _empty_DNA = DNA("")
2329 _restrictionbatch = RestrictionBatch()
2330
2331
2332 -class Analysis(RestrictionBatch, PrintFormat):
2333 """Provide methods for enhanced analysis and pretty printing."""
2334
2337 """Initialize an Analysis with RestrictionBatch and sequence.
2338
2339 For most of the methods of this class if a dictionary is given it will
2340 be used as the base to calculate the results.
2341 If no dictionary is given a new analysis using the RestrictionBatch
2342 which has been given when the Analysis class has been instantiated,
2343 will be carried out and used.
2344 """
2345 RestrictionBatch.__init__(self, restrictionbatch)
2346 self.rb = restrictionbatch
2347 self.sequence = sequence
2348 self.linear = linear
2349 if self.sequence:
2350 self.search(self.sequence, self.linear)
2351
2353 """Represent ``Analysis`` class as a string."""
2354 return "Analysis(%s,%s,%s)" %\
2355 (repr(self.rb), repr(self.sequence), self.linear)
2356
2358 """Filter result for keys which are in wanted (PRIVATE).
2359
2360 Internal use only. Returns a dict.
2361
2362 Screen the results through wanted set.
2363 Keep only the results for which the enzymes is in wanted set.
2364 """
2365
2366 return {k: v for k, v in self.mapping.items() if k in wanted}
2367
2369 """Set boundaries to correct values (PRIVATE).
2370
2371 Format the boundaries for use with the methods that limit the
2372 search to only part of the sequence given to analyse.
2373 """
2374 if not isinstance(start, int):
2375 raise TypeError("expected int, got %s instead" % type(start))
2376 if not isinstance(end, int):
2377 raise TypeError("expected int, got %s instead" % type(end))
2378 if start < 1:
2379 start += len(self.sequence)
2380 if end < 1:
2381 end += len(self.sequence)
2382 if start < end:
2383 pass
2384 else:
2385 start, end = end, start
2386 if start < end:
2387 return start, end, self._test_normal
2388
2390 """Test if site is between start and end (PRIVATE).
2391
2392 Internal use only
2393 """
2394 return start <= site < end
2395
2397 """Test if site is between end and start, for circular sequences (PRIVATE).
2398
2399 Internal use only.
2400 """
2401 return start <= site <= len(self.sequence) or 1 <= site < end
2402
2411
2412 - def print_that(self, dct=None, title="", s1=""):
2413 """Print the output of the analysis.
2414
2415 If dct is not given the full dictionary is used.
2416 s1: Title for non-cutting enzymes
2417 This method prints the output of A.format_output() and it is here
2418 for backwards compatibility.
2419 """
2420 print(self.format_output(dct, title, s1))
2421
2423 """Change parameters of print output.
2424
2425 It is possible to change the width of the shell by setting
2426 self.ConsoleWidth to what you want.
2427 self.NameWidth refer to the maximal length of the enzyme name.
2428
2429 Changing one of these parameters here might not give the results
2430 you expect. In which case, you can settle back to a 80 columns shell
2431 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2432 you get it right.
2433 """
2434 for k, v in what.items():
2435 if k in ("NameWidth", "ConsoleWidth"):
2436 setattr(self, k, v)
2437 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2438 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2439 elif k == "sequence":
2440 setattr(self, "sequence", v)
2441 self.search(self.sequence, self.linear)
2442 elif k == "rb":
2443 self = Analysis.__init__(self, v, self.sequence, self.linear)
2444 elif k == "linear":
2445 setattr(self, "linear", v)
2446 self.search(self.sequence, v)
2447 elif k in ("Indent", "Maxsize"):
2448 setattr(self, k, v)
2449 elif k in ("Cmodulo", "PrefWidth"):
2450 raise AttributeError(
2451 "To change %s, change NameWidth and/or ConsoleWidth" % k)
2452 else:
2453 raise AttributeError("Analysis has no attribute %s" % k)
2454 return
2455
2456 - def full(self, linear=True):
2457 """Perform analysis with all enzymes of batch and return all results.
2458
2459 Full Restriction Map of the sequence, as a dictionary.
2460 """
2461 return self.mapping
2462
2463 - def blunt(self, dct=None):
2464 """Return only cuts that have blunt ends."""
2465 if not dct:
2466 dct = self.mapping
2467 return {k: v for k, v in dct.items() if k.is_blunt()}
2468
2470 """Return only cuts that have 5' overhangs."""
2471 if not dct:
2472 dct = self.mapping
2473 return {k: v for k, v in dct.items() if k.is_5overhang()}
2474
2476 """Return only cuts that have 3' overhangs."""
2477 if not dct:
2478 dct = self.mapping
2479 return {k: v for k, v in dct.items() if k.is_3overhang()}
2480
2482 """Return only results from enzymes that produce defined overhangs."""
2483 if not dct:
2484 dct = self.mapping
2485 return {k: v for k, v in dct.items() if k.is_defined()}
2486
2488 """Return only results from enzyme with at least one cut."""
2489 if not dct:
2490 dct = self.mapping
2491 return {k: v for k, v in dct.items() if v}
2492
2494 """Return only results from enzymes that don't cut the sequence."""
2495 if not dct:
2496 dct = self.mapping
2497 return {k: v for k, v in dct.items() if not v}
2498
2500 """Return only results from enzymes that cut the sequence N times."""
2501 if not dct:
2502 dct = self.mapping
2503 return {k: v for k, v in dct.items() if len(v) == N}
2504
2506 """Return only results from enzymes that cut (x,y,z,...) times."""
2507 if not dct:
2508 dct = self.mapping
2509 return {k: v for k, v in dct.items() if len(v) in list}
2510
2512 """Return only results from enzymes which names are listed."""
2513 for i, enzyme in enumerate(names):
2514 if enzyme not in AllEnzymes:
2515 warnings.warn("no data for the enzyme: %s" % enzyme,
2516 BiopythonWarning)
2517 del names[i]
2518 if not dct:
2519 return RestrictionBatch(names).search(self.sequence, self.linear)
2520 return {n: dct[n] for n in names if n in dct}
2521
2523 """Return only results form enzymes with a given site size."""
2524 sites = [name for name in self if name.size == site_size]
2525 if not dct:
2526 return RestrictionBatch(sites).search(self.sequence)
2527 return {k: v for k, v in dct.items() if k in site_size}
2528
2530 """Return only results from enzymes that only cut within start, end."""
2531 start, end, test = self._boundaries(start, end)
2532 if not dct:
2533 dct = self.mapping
2534 d = dict(dct)
2535 for key, sites in dct.items():
2536 if not sites:
2537 del d[key]
2538 continue
2539 for site in sites:
2540 if test(start, end, site):
2541 continue
2542 else:
2543 del d[key]
2544 break
2545 return d
2546
2547 - def between(self, start, end, dct=None):
2548 """Return only results from enzymes that cut at least within borders.
2549
2550 Enzymes that cut the sequence at least in between start and end.
2551 They may cut outside as well.
2552 """
2553 start, end, test = self._boundaries(start, end)
2554 d = {}
2555 if not dct:
2556 dct = self.mapping
2557 for key, sites in dct.items():
2558 for site in sites:
2559 if test(start, end, site):
2560 d[key] = sites
2561 break
2562 continue
2563 return d
2564
2566 """Return only results from within start, end.
2567
2568 Enzymes must cut inside start/end and may also cut outside. However,
2569 only the cutting positions within start/end will be returned.
2570 """
2571 d = []
2572 if start <= end:
2573 d = [(k, [vv for vv in v if start <= vv <= end])
2574 for k, v in self.between(start, end, dct).items()]
2575 else:
2576 d = [(k, [vv for vv in v if start <= vv or vv <= end])
2577 for k, v in self.between(start, end, dct).items()]
2578 return dict(d)
2579
2581 """Return only results from enzymes that only cut outside start, end.
2582
2583 Enzymes that cut the sequence outside of the region
2584 in between start and end but do not cut inside.
2585 """
2586 start, end, test = self._boundaries(start, end)
2587 if not dct:
2588 dct = self.mapping
2589 d = dict(dct)
2590 for key, sites in dct.items():
2591 if not sites:
2592 del d[key]
2593 continue
2594 for site in sites:
2595 if test(start, end, site):
2596 del d[key]
2597 break
2598 else:
2599 continue
2600 return d
2601
2602 - def outside(self, start, end, dct=None):
2603 """Return only results from enzymes that at least cut outside borders.
2604
2605 Enzymes that cut outside the region in between start and end.
2606 They may cut inside as well.
2607 """
2608 start, end, test = self._boundaries(start, end)
2609 if not dct:
2610 dct = self.mapping
2611 d = {}
2612 for key, sites in dct.items():
2613 for site in sites:
2614 if test(start, end, site):
2615 continue
2616 else:
2617 d[key] = sites
2618 break
2619 return d
2620
2622 """Return only results from enzymes that don't cut between borders."""
2623 if not dct:
2624 dct = self.mapping
2625 d = self.without_site()
2626 d.update(self.only_outside(start, end, dct))
2627 return d
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652 CommOnly = RestrictionBatch()
2653 NonComm = RestrictionBatch()
2654 for TYPE, (bases, enzymes) in typedict.items():
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672 bases = tuple(eval(x) for x in bases)
2673
2674
2675
2676
2677 T = type.__new__(RestrictionType, "RestrictionType", bases, {})
2678 for k in enzymes:
2679
2680
2681
2682
2683
2684 newenz = T(k, bases, enzymedict[k])
2685
2686
2687
2688
2689
2690 if newenz.is_comm():
2691 CommOnly.add_nocheck(newenz)
2692 else:
2693 NonComm.add_nocheck(newenz)
2694
2695
2696
2697 AllEnzymes = RestrictionBatch(CommOnly)
2698 AllEnzymes.update(NonComm)
2699
2700
2701
2702 names = [str(x) for x in AllEnzymes]
2703 try:
2704 del x
2705 except NameError:
2706
2707 pass
2708 locals().update(dict(zip(names, AllEnzymes)))
2709 __all__ = ("FormattedSeq", "Analysis", "RestrictionBatch", "AllEnzymes",
2710 "CommOnly", "NonComm") + tuple(names)
2711 del k, enzymes, TYPE, bases, names
2712